setup

library(tidyverse)
library(magrittr)

library(limma)
library(GEOquery)
library(affy)
library(kableExtra)
library(ggfortify)
library(ggeasy)
library(mogene20stprobeset.db)
library(mogene20sttranscriptcluster.db)
library(oligo)
library(msigdbr)
library(readxl)
library(pheatmap)
library(RColorBrewer)
library(viridis)
library(UpSetR)
library(fgsea)
library(harmonicmeanp)
library(ggpubr)
library(ggpol)
library(cowplot)
library(gridExtra)
library(lattice)
library(grid)
library(arrayQualityMetrics)
library(factoextra)
library(cluster) 

# This code loads the flat violin function in the working environment
source("https://gist.githubusercontent.com/benmarwick/2a1bb0133ff568cbe28d/raw/fb53bd97121f7f9ce947837ef1a4c65a73bffb3f/geom_flat_violin.R")

Introduction

This document will contain my analysis of the microarray data of 12-months old male mice from two mouse models: AppNL-G-F/NL-G-F mice, harboring three pathogenic App-knock-in mutations that promote aggressive amyloidosis by increasing total Aβ production (Swedish mutation KM670/671NL), increasing the Aβ42/Aβ40 ratio (Iberian mutation I716F), and promoting Aβ aggregation through facilitating oligomerization and reducing proteolytic degradation (Arctic mutation E693G), under the control of the endogenous APP promoter

3xTg-AD-H mouse carrying two mutated human overexpressed transgenes, APP (Swedish KM670/671NL) and MAPT (P301L), driven by exogenous Thy1.2 promoter with a knock-in mutation of Psen1 (M146V), together with their correspondent controls (male, n=3 for each group).

It does not appear to be littermate controls.

Summary of samples:

geo <- getGEO(filename="data/AppNL-G-F.microarray/GSE92926_series_matrix.txt") %>%
  pData()

geo %>%
  dplyr::select(`genotype:ch1`, `age:ch1`, `tissue:ch1`, supplementary_file) %>%
  kable(caption = "summary of samples") %>%
  kable_styling()
summary of samples
genotype:ch1 age:ch1 tissue:ch1 supplementary_file
GSM2440481 AppNL-G-F/NL-G-F Homo 12 months of age Cortex ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2440nnn/GSM2440481/suppl/GSM2440481_APPNL-G-F_1_MoGene-2_0-st_.CEL.gz
GSM2440482 AppNL-G-F/NL-G-F Homo 12 months of age Cortex ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2440nnn/GSM2440482/suppl/GSM2440482_APPNL-G-F_2_MoGene-2_0-st_.CEL.gz
GSM2440483 AppNL-G-F/NL-G-F Homo 12 months of age Cortex ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2440nnn/GSM2440483/suppl/GSM2440483_APPNL-G-F_3_MoGene-2_0-st_.CEL.gz
GSM2440484 App+/+ 12 months of age Cortex ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2440nnn/GSM2440484/suppl/GSM2440484_APPWT_1_MoGene-2_0-st.CEL.gz
GSM2440485 App+/+ 12 months of age Cortex ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2440nnn/GSM2440485/suppl/GSM2440485_APPWT_2_MoGene-2_0-st.CEL.gz
GSM2440486 App+/+ 12 months of age Cortex ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2440nnn/GSM2440486/suppl/GSM2440486_APPWT_3_MoGene-2_0-st.CEL.gz
GSM2440487 3xTg-AD-H Homo 12 months of age Cortex ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2440nnn/GSM2440487/suppl/GSM2440487_3xTg-AD-H_1_MoGene-2_0-st_.CEL.gz
GSM2440488 3xTg-AD-H Homo 12 months of age Cortex ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2440nnn/GSM2440488/suppl/GSM2440488_3xTg-AD-H_2_MoGene-2_0-st_.CEL.gz
GSM2440489 3xTg-AD-H Homo 12 months of age Cortex ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2440nnn/GSM2440489/suppl/GSM2440489_3xTg-AD-H_3_MoGene-2_0-st_.CEL.gz
GSM2440490 non-Tg 12 months of age Cortex ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2440nnn/GSM2440490/suppl/GSM2440490_non-Tg_1_MoGene-2_0-st_.CEL.gz
GSM2440491 non-Tg 12 months of age Cortex ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2440nnn/GSM2440491/suppl/GSM2440491_non-Tg_2_MoGene-2_0-st_.CEL.gz
GSM2440492 non-Tg 12 months of age Cortex ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2440nnn/GSM2440492/suppl/GSM2440492_non-Tg_3_MoGene-2_0-st_.CEL.gz

import data

First I will obtain the expression data. Since this array is a MoGene T array, I will use the oligo package will be used to import the expression values from the .CEL files .

exp.cel <- 
  list.files("data/AppNL-G-F.microarray/celFiles", 
                      pattern = ".CEL", full.names = TRUE) %>% 
  read.celfiles()
## Reading in : data/AppNL-G-F.microarray/celFiles/GSM2440481_APPNL-G-F_1_MoGene-2_0-st_.CEL
## Reading in : data/AppNL-G-F.microarray/celFiles/GSM2440482_APPNL-G-F_2_MoGene-2_0-st_.CEL
## Reading in : data/AppNL-G-F.microarray/celFiles/GSM2440483_APPNL-G-F_3_MoGene-2_0-st_.CEL
## Reading in : data/AppNL-G-F.microarray/celFiles/GSM2440484_APPWT_1_MoGene-2_0-st.CEL
## Reading in : data/AppNL-G-F.microarray/celFiles/GSM2440485_APPWT_2_MoGene-2_0-st.CEL
## Reading in : data/AppNL-G-F.microarray/celFiles/GSM2440486_APPWT_3_MoGene-2_0-st.CEL
## Reading in : data/AppNL-G-F.microarray/celFiles/GSM2440487_3xTg-AD-H_1_MoGene-2_0-st_.CEL
## Reading in : data/AppNL-G-F.microarray/celFiles/GSM2440488_3xTg-AD-H_2_MoGene-2_0-st_.CEL
## Reading in : data/AppNL-G-F.microarray/celFiles/GSM2440489_3xTg-AD-H_3_MoGene-2_0-st_.CEL
## Reading in : data/AppNL-G-F.microarray/celFiles/GSM2440490_non-Tg_1_MoGene-2_0-st_.CEL
## Reading in : data/AppNL-G-F.microarray/celFiles/GSM2440491_non-Tg_2_MoGene-2_0-st_.CEL
## Reading in : data/AppNL-G-F.microarray/celFiles/GSM2440492_non-Tg_3_MoGene-2_0-st_.CEL
# tidy ip column names
colnames(exp.cel) %<>%
  str_remove("^GSM24404[:digit:]+_") %>%
  str_remove("_MoGene-2_0-st_.CEL") %>%
  str_remove("_MoGene-2_0-st.CEL")

targets <- colnames(exp.cel) %>% 
  as_tibble() %>% 
  set_colnames("sample") %>% 
  mutate(Genotype = case_when(
    grepl(sample, pattern = "APPNL") ~ "APP_NLGF", 
    grepl(sample, pattern = "APPWT") ~ "APP_WT", 
    grepl(sample, pattern = "3xTg") ~ "AD3xTg",
    grepl(sample, pattern = "non-Tg") ~ "Non_Tg") %>% 
      as.factor(), 
    Genotype = factor(Genotype, 
                      levels = c("APP_WT", "APP_NLGF", "Non_Tg", "AD3xTg"))
    )

pData(exp.cel) %<>% 
  as.data.frame() %>% 
  rownames_to_column("sample") %>% 
  left_join(targets) %>% 
  column_to_rownames("sample") 

Qualtiy control of the raw data

PCA plot

First I will perform principal component analysis on the raw data. Here we check for outliers and try to see whether the data clusters as expected, e.g. by the experimental conditions. in this plot of PC1 against PC2, samples do not overly seperate by genotype.

pca_raw <- log2(exprs(exp.cel)) %>%
  t() %>%
  prcomp() %>%
  autoplot(data = tibble(sample = rownames(.$x)) %>%
             left_join(targets),
           colour = "Genotype",
           size = 4) +
  theme_bw() +
  theme(aspect.ratio = 1)

pca_raw

Boxplot

exprs(exp.cel) %>%
  log2() %>%
    as.data.frame() %>%
    rownames_to_column("probeID") %>%
    gather(key = "sample", value = "expression",
           targets$sample) %>%
    left_join(pData(exp.cel) %>%
                as.data.frame() %>%
                rownames_to_column("sample")) %>%
    ggplot(aes(x = sample, y = expression)) +
    geom_boxplot(aes(fill = Genotype), width = 0.2) +
    geom_flat_violin(aes(fill = Genotype),
                     colour = NA,
                     position = position_nudge(x = .2)) +
    #ggtitle("Boxplot of expression values") +
    theme_bw() +
    easy_rotate_x_labels(angle = 315)

array quality metrics

Next I will use the arrayQualityMetrics package for more generate a report for assistance in detecting outliers. It will generate an additional control file containing the quality control plots together with a description of their aims and an identification of possible outliers. Inspection of this document did not reveal any outliers to be remvoed. This was only run on the first time, as it takes a long time to compile.

Background adjustment, calibration, summarization and annotation

The next step in the analysis is to perform background adjust,emt (i.e. remove background level noise), calibrate (i.e. normalise between arrays) and summarise the multiple probes per transcript to a gene level value.

The package oligo allows us to perform background correction, normalization and summarization in one single step using a deconvolution method for background correction, quantile normalization and the RMA (robust multichip average) algorithm for summarization.

This series of steps as a whole is commonly referred to as RMA algorithm, although strictly speaking RMA is merely a summarization method.

Relative Log Expression data quality analysis

Before calibrating and evaluating the data, we want to perform another quality control procedure, namely Relative Log Expression (RLE), To this end, we first perform an RMA without prior normalization. Then ,the RLE is performed by calculating the median log2 intensity of every transcript across all arrays.

This plot shows the deviation of expression intensity from the median expression for each microarray. If any of the boxplots from each sample have a larger extension, this indicates a high deviation from the median in a lot of transcripts, suggesting that these arrays are different from most of the others in some way. Boxes that are shifted in y-direction indicate a systematically higher or lower expression of the majority of transcripts in comparison to most of the other arrays. This could be caused by quality issues or batch effects. Therefore, if shape and median of a given box varies too much from the bulk, they should be inspected and potentially removed. Most of the boxplots appear similar, except for sample non-Tg_3 which seem slightly increased. However , it doesnt look too bad and we only have n = 3, so will retain it.

eset <- oligo::rma(exp.cel, target = "core", normalize = FALSE)
## Background correcting
## Calculating Expression
rowmedians <- eset %>% 
  exprs() %>% 
  as.matrix() %>% 
  rowMedians()

sweep(exprs(eset), 1, rowmedians) %>% 
  as.data.frame() %>% 
  gather(key = "sample",value = "log2_expression_deviation") %>% 
  ggplot(aes(x = sample, y = log2_expression_deviation)) +
  geom_boxplot(aes(fill = sample), outlier.shape = NULL) +
  theme_bw() +
  ggtitle("Boxplot for RLE values") +
  easy_rotate_x_labels(angle = -90)

RMA

eset_norm <- oligo::rma(exp.cel, target = "core")
## Background correcting
## Normalizing
## Calculating Expression

Clustering analysis

PCA on RMA normalised data

After RMA normalisation, samples now mostly cluster by genotype across PC2.

ggarrange(
  pca_raw +
    ggtitle("Before RMA"), 
  
  exprs(eset_norm) %>% 
    t() %>% 
    prcomp() %>% 
    autoplot(data = tibble(sample = rownames(.$x)) %>%
               left_join(targets), 
             colour = "Genotype", 
             fill = "Genotype",
             size = 4) +
    theme_bw() +
    theme(aspect.ratio = 1) +
    ggtitle("After RMA"), 
  nrow = 1, 
  common.legend = TRUE
)

Clustering analysis

Next, I want to see how well samples cluster based on their genotype using clustering. First, I will calculate sample:sample distances using the dist function. Rather than the Euclidean distance, i will use the Manhattan distance, which uses absolute distances along rectangular paths instead of squared distances of the direct path, as it is more robust. Samples do not overly cluster based on their Manhatten distance.

dists <- exprs(eset_norm) %>% 
  t() %>% 
  dist(method = "manhattan") %>% 
  as.matrix()

hmcol <- viridis_pal(option = "magma")(255)

diag(dists) <- NA

pheatmap(dists, 
         col = (hmcol), 
         annotation_row = pData(eset_norm) %>% 
           dplyr::select(Genotype),
         legend = TRUE, 
         treeheight_row = 0,
         cellwidth = 10, 
         cellheight = 10,
         legend_breaks = c(min(dists, na.rm = TRUE), 
                         max(dists, na.rm = TRUE)), 
         legend_labels = (c("small distance", "large distance")),
         main = "Clustering heatmap for the calibrated samples based on their Manhatten distance"
         )

The clustering was repeated using the Euclidean distance. Samples clustered together a bit better using this method.

dists_eucl<- exprs(eset_norm) %>% 
  t() %>% 
  dist() %>% 
  as.matrix()

diag(dists_eucl) <- NA
pheatmap(dists_eucl, 
         col = (hmcol), 
         annotation_row = pData(eset_norm) %>% 
           dplyr::select(Genotype),
         legend = TRUE, 
         treeheight_row = 0,
         cellwidth = 10, 
         cellheight = 10,
         legend_breaks = c(min(dists, na.rm = TRUE), 
                         max(dists, na.rm = TRUE)), 
         legend_labels = (c("small distance", "large distance")),
         main = "Clustering heatmap for the calibrated samples based on their Euclidean distance"
         )

Filtering based on intensity

Now, I will filter out lowly expressed genes. Microarray data commonly show a large number of probes in the background intensity range. These probes also do not change much across arrays. Hence they combine a low variance with a low intensity. Thus, they could end up being detected as differentially expressed although they are barely above the “detection” limit and are not very informative in general.

We will perform a “soft” intensity based filtering here, since this is recommended by the limma user guide.

medians <- rowMedians(exprs(eset_norm))

hist(medians, 100, col = "cornsilk1", freq = FALSE, 
            main = "Histogram of the median intensities", 
            border = "antiquewhite4",
            xlab = "Median intensities")

I will choose the threshold of 3,5 as the peak seems to stop about here. Transcripts that do not have intensities larger than the threshold in at least 3 arrays are excluded (as 3 is the number of samples in each group).

man_threshold <- 3.5

hist_res <- hist(medians, 100, col = "cornsilk", freq = FALSE, 
            main = "Histogram of the median intensities",
            border = "antiquewhite4",
            xlab = "Median intensities")

abline(v = man_threshold, col = "coral4", lwd = 2)

samples_cutoff <- 3

idx_man_threshold <- 
  apply(exprs(eset_norm), 1,
        function(x){
          sum(x > man_threshold) >= samples_cutoff})

table(idx_man_threshold) %>% 
  as_tibble() %>% 
  set_colnames(c("Expression above threshold?", "Number of genes")) %>% 
  kable() %>% 
  kable_styling()
Expression above threshold? Number of genes
FALSE 4830
TRUE 36515
exp.filtered <- subset(eset_norm, idx_man_threshold)

Annotation of the transcript clusters

Before we continue with the linear models for microarrays and differential expression, we first add “feature data”, i.e. annotation information to the transcript cluster identifiers stored in the featureData of our ExpressionSet. The anno_2 object was downloaded from the thermo scientific website.

anno <- AnnotationDbi::select(mogene20sttranscriptcluster.db,
                              keys = (featureNames(exp.filtered)),
                              columns = c("SYMBOL", "GENENAME", "ENSEMBL"),
                              keytype = "PROBEID")

anno <- subset(anno, !is.na(SYMBOL))

anno_2 <- 
  read_csv("data/AppNL-G-F.microarray/MoGene-2_0-st-v1.na36.mm10.transcript.csv", skip = 23) %>% 
   mutate(probeset_id = as.character(probeset_id))

1408transcript-cluster identifiers map to multiple gene symbols, i.e. they can’t be unambigously assigned. For this analysis, I will just omit these genes from the analysis

multimapppers <- anno %>% 
  group_by(PROBEID) %>% 
  summarise(no_of_matches = n_distinct(SYMBOL)) %>% 
  dplyr::filter( no_of_matches > 1) %>% 
  .$PROBEID

ids_to_exlude <- (featureNames(exp.filtered) %in% multimapppers)

table(ids_to_exlude)
## ids_to_exlude
## FALSE  TRUE 
## 35107  1408
final.eset <- subset(exp.filtered, !ids_to_exlude)

nonames <- exprs(exp.filtered) %>% 
  rownames() %>% 
  as_tibble() %>% 
  set_colnames("PROBEID") %>% 
  left_join(anno) %>% 
  dplyr::filter(is.na(SYMBOL)) %>% 
  .$PROBEID

ids_to_exlude2 <- (featureNames(final.eset) %in% nonames)


final.eset <- subset(final.eset, !ids_to_exlude2)  

check sex

The study claims that all mice were male in this dataset. To double check this, I will make sure that all samples express genes from the Y-chromosome. All samples express genes from the Y chromosome and therefore are all males.

final.eset %>% 
  exprs() %>% 
  as.data.frame() %>% 
  rownames_to_column("probeset_id") %>%
  left_join(anno_2) %>% 
  dplyr::filter(seqname %in% c("chrX", "chrY")) %>% 
  gather(key = "sample", value = "expression", targets$sample) %>% 
  ggplot(aes(x = sample, y = expression)) + 
  geom_boxplot(aes(fill = seqname)) +
  easy_rotate_x_labels(angle = -45)

PCA on final expression

A final check of the overall similarity between samples, which now cluster pretty nicely by genotype.

exprs(final.eset) %>% 
    t() %>% 
    prcomp() %>% 
    autoplot(data = tibble(sample = rownames(.$x)) %>%
               left_join(targets), 
             colour = "Genotype", 
             fill = "Genotype",
             size = 4) +
    theme_bw() +
    theme(aspect.ratio = 1) +
    ggtitle("After pre-processing")

Differential expression

To test which genes are differentially expressed (DE) in the pairwise contrasts of probes between the KI vs wt, and the Tg vs non-Tg, I will use the package limma. A linear model is fitted to the expression data for each gene. Empirical Bayes and other methods are used to borrow information across genes for the residual variance estimation leading to “moderated” t-statistics, and stabilizing the analysis for experiments with just a small number of arrays. A design matrix was generated, which tells limma which mouse belongs to which genotype. Then, a contrasts matrix is generated to define the pairwise contrasts.

Finally, the toptable is called as the final DE results.

design <- model.matrix(~0 + Genotype, data = pData(final.eset)) %>% 
  set_colnames(gsub(pattern = "Genotype", replacement = "", colnames(.)))

contrasts <- makeContrasts(Tg = `AD3xTg` -`Non_Tg`, 
              KI = `APP_NLGF` - `APP_WT` ,
              levels=design)

fit <- lmFit(final.eset, design) %>% 
  contrasts.fit(contrasts) %>% 
  eBayes(robust = TRUE)


toptable <- colnames(contrasts) %>% 
  sapply(function(x){
    topTable(fit, coef = x,
             adjust.method = "fdr", number =  Inf) %>% 
      as.data.frame() %>% 
      rownames_to_column("PROBEID") %>% 
      as_tibble() %>%
      left_join(anno, by = ("PROBEID")) %>%
      dplyr::select(SYMBOL, everything()) %>%
      mutate(DE = adj.P.Val < 0.05,
             coef = x)
  }, simplify = FALSE)

# toptable %>% 
#   lapply(function(x) {
#     x %>% 
#       dplyr::filter(DE == TRUE) %>% 
#       nrow()
#   })

A total of 126 genes were found to be DE in the 3xTg mice compared to controls, and 158 genes were found to be DE in the APP NLGF mice compared to controls. A few plots below summarise the DGE analysis

Visualisation

Venn diagram

decideTests(fit) %>% vennCounts() %>% vennDiagram()
Four DE genes are shared between the Tg and KI mice

Four DE genes are shared between the Tg and KI mice

final.eset[c(intersect(toptable$KI %>% 
                           dplyr::filter(DE == TRUE) %>% 
                           .$PROBEID, 
                         toptable$Tg %>% 
                           dplyr::filter(DE == TRUE) %>% 
                           .$PROBEID)),] %>% 
  exprs %>% 
  as.data.frame() %>% 
  rownames_to_column("PROBEID") %>% 
  left_join(anno) %>% 
  gather("sample", "expression", targets$sample) %>% 
  left_join(targets) %>% 
  mutate(Genotype = factor(Genotype, levels = c("APP_WT", "APP_NLGF", "Non_Tg", "AD3xTg"))) %>% 
  ggplot(aes(x = Genotype, y = expression)) +
  geom_jitter() +
  geom_boxplot(aes(colour = Genotype)) +
  facet_wrap(~SYMBOL) +
  theme_bw() +
  easy_rotate_x_labels(angle = -45) +
  scale_y_continuous(limits = c(5, 12.5)) 
Four DE genes are shared between the Tg and KI mice

Four DE genes are shared between the Tg and KI mice

  #ggsave("plots/sharedDEgenes.png", width = 10, height = 7, units = "cm", dpi = 600, scale = 1.5)

Volcano plot

toptable %>% 
  bind_rows() %>% 
  mutate(DE_dir = case_when(
    DE == TRUE & sign(logFC) == 1 ~ "Upregulation\n(FDR < 0.05)", 
    DE == TRUE & sign(logFC) == -1 ~ "Downregulation\n(FDR < 0.05)", 
    TRUE ~ "No significant change"
  )) %>% 
    mutate(coef = case_when(coef == "KI" ~ "APP NL-G-F", 
                          coef == "Tg" ~ "3xTg")) %>% 
  ggplot(aes(x = logFC, y = -log10(P.Value))) + 
  geom_point(aes(colour = DE_dir), 
             alpha = 0.5) +
  facet_wrap(~coef) +
  theme_bw() +
  labs(colour = "Probes with significant:")+
  scale_color_manual(values = c("darkblue", "grey30", "red"))+
  theme(legend.position = "bottom", plot.margin = unit(c(1, 2, 2, 4), "cm")) 

toptable %>% 
  bind_rows() %>% 
  mutate(DE_dir = case_when(
    DE == TRUE & sign(logFC) == 1 ~ "Upregulation\n(FDR < 0.05)", 
    DE == TRUE & sign(logFC) == -1 ~ "Downregulation\n(FDR < 0.05)", 
    TRUE ~ "No significant change"
  )) %>% 
    mutate(coef = case_when(coef == "KI" ~ "APP NL-G-F", 
                          coef == "Tg" ~ "3xTg")) %>% 
  ggplot(aes(x = AveExpr, y = logFC)) + 
  geom_point(aes(colour = DE_dir), 
             alpha = 0.5) +
  facet_wrap(~coef, nrow = 2) +
  theme_bw() +
  labs(colour = "Probes with significant:")+
  scale_color_manual(values = c("darkblue", "grey30", "red"))+
  theme(legend.position = "bottom", plot.margin = unit(c(1, 2, 2, 4), "cm"))

Enrichment

define the kegg and IRE gene sets by gene name.

Only genes tested in the DE analysis are retained in the gene sets,

KEGG <- 
  msigdbr("Mus musculus", category = "C2", subcategory = "CP:KEGG") %>% 
  dplyr::rename("SYMBOL" = "gene_symbol" ) %>% 
  dplyr::filter(SYMBOL %in% toptable$KI$SYMBOL) %>% 
  dplyr::filter(!is.na(SYMBOL)) %>% 
  distinct(gs_name, SYMBOL, .keep_all = TRUE) %>% 
  split(f = .$gs_name) %>%
  lapply(extract2, "SYMBOL")

# a file downloaded from biomart
biomart <- read_tsv("data/AppNL-G-F.microarray/mart_export (2).txt") 
  
ireGenes <- 
  read_excel("data/ireGenes.xlsx", sheet = "Mouse IREs") %>% 
    gather(key = "ire", value = "gene_id") %>% 
    left_join(biomart %>% 
                dplyr::select(gene_id =  `Gene stable ID`, 
                              `Gene name`) %>% 
                unique()
    ) %>% 
    dplyr::filter(`Gene name` %in% toptable$Tg$SYMBOL) %>% 
    split(f = .$ire) %>% 
    lapply(magrittr::extract2, "Gene name")

ORA using kegga

ora <-  toptable %>% 
  sapply(function(x){ 
    kegga(de = x %>% 
            dplyr::filter(DE == TRUE) %>% 
            .$SYMBOL, 
          universe = x %>% 
            .$SYMBOL, 
          gene.pathway = KEGG %>% 
            unlist %>%
            as.data.frame() %>%
            rownames_to_column("pathway") %>%
            set_colnames(c("pathway", "gene_id")) %>%
            mutate(pathway = pathway %>% str_remove_all("[0-9]+$")) %>%
            dplyr::select(gene_id, pathway) %>% 
            bind_rows(ireGenes %>% 
                        unlist %>%
                        as.data.frame() %>%
                        rownames_to_column("pathway") %>%
                        set_colnames(c("pathway", "gene_id")) %>%
                        mutate(pathway = pathway %>%
                                 str_remove_all("[0-9]+$")) %>%
                        dplyr::select(gene_id, pathway))
    ) %>% 
      topKEGG(number = Inf ) %>% 
      as.data.frame() %>% 
      rownames_to_column("pathway") %>% 
      as_tibble() %>% 
      mutate(FDR = p.adjust(P.DE, "fdr"), 
             bonf = p.adjust(P.DE, "bonf"),
             sig = FDR < 0.05, 
             coef = unique(x$coef)) %>% 
      dplyr::select(pathway, raw.p = P.DE, FDR, bonf, sig, DE, N, coef)
  }, simplify = FALSE)

sigGenesets <- ora %>% 
  bind_rows() %>% 
  dplyr::filter(bonf < 0.05) %>% 
  .$pathway %>% 
  unique
  

ora %>% 
  bind_rows() %>% 
  dplyr::filter(pathway %in% sigGenesets) %>% 
  mutate(coef = case_when(coef == "KI" ~ "APP NL-G-F", 
                          coef == "Tg" ~ "3xTg")) %>% 
  mutate(N = case_when(
    coef == "3xTg" ~ N*-1, 
    TRUE ~ N
  ), 
  DE = case_when(
    coef == "3xTg" ~ DE*-1, 
    TRUE ~ DE
  )) %>% 
  ggplot(aes(y = reorder(pathway, abs(N)))) +
  geom_col(aes(x = N), fill = "grey") +
  geom_col(aes(x = DE), fill = "orchid3") +
  geom_point(x = -10,
             size = 2,
             data = . %>% 
               dplyr::filter(coef == "APP NL-G-F" & bonf < 0.05)) +
  geom_point(x = 10, 
             fill = "magenta",
             size = 2,
             data = . %>% 
               dplyr::filter(coef == "3xTg" & bonf < 0.05)) +
  facet_share(~coef, scales = "free_x") +
  theme_bw() +
  labs(x = "Number of genes in geneset",
       fill = "Number of DE genes in gene set",
       y = "gene set") 

  #ggtitle("Over-represented KEGG and IRE gene sets", subtitle = "FDR < 0.05")
KEGG[ora$KI %>% 
  dplyr::filter(bonf < 0.05) %>% 
  .$pathway] %>% 
  lapply(function(x) {
    x %>% 
      intersect(toptable$KI %>% dplyr::filter(DE == TRUE) %>% .$SYMBOL)
  }) %>% 
  fromList() %>% 
  upset(order.by = 'freq', 
        nsets = length(.), )
Distribution of DE genes in homozygous knock in mouse brains

Distribution of DE genes in homozygous knock in mouse brains

ranked lists

For enrichment analysis, I will perform enrichment testing on the HMP of fry, camera and fgsea. Since a single gene can have the multiple probes, I will use the probe with the highest expression as the respresentative probe for that particular gene.

enrichInput <- 
  exprs(final.eset) %>% 
  as.data.frame() %>% 
  rownames_to_column("PROBEID") %>% 
  as_tibble() %>% 
  left_join(toptable$KI %>% dplyr::select(PROBEID, SYMBOL)) %>%
  gather(key = 'sample', value = 'expression', targets$sample) %>% 
  group_by(SYMBOL, sample) %>% 
  summarise(expression = max(expression)) %>% 
  spread(key ='sample', value = 'expression' ) %>% 
  ungroup() %>% 
  column_to_rownames("SYMBOL")

fryRes <- colnames(contrasts) %>% 
  sapply(function(x) {
    enrichInput  %>% 
      fry(index = c(KEGG,ireGenes), 
          design = design, 
          contrast = contrasts[,x], 
          sort = "mixed"
      ) %>% 
      rownames_to_column('geneset') %>% 
      mutate(
        coef = x) %>% 
      as_tibble()
  }, simplify = FALSE)

camerares <- colnames(contrasts) %>% 
  sapply(function(x) {
    
    enrichInput %>% 
      camera(index = c(KEGG,ireGenes), 
             design = design, 
             contrast = contrasts[,x]
      ) %>% 
      rownames_to_column('geneset') %>% 
      mutate(
        coef = x) %>% 
      as_tibble()
  }, simplify = FALSE)

ranks <- sapply(toptable, function(x) {
  x %>% 
    dplyr::filter(SYMBOL %in% rownames(enrichInput)) %>% 
    arrange(t) %>% 
    dplyr::select(c("SYMBOL", "t")) %>% #only want the Pvalue with sign
    with(structure(t, names = SYMBOL)) %>% 
    rev() # reverse so the start of the list is upregulated genes
}, simplify = FALSE)

fgseares <- ranks %>% 
  lapply(function(x) {
    fgseaMultilevel(x, pathways =  c(KEGG, ireGenes)) %>% 
      as_tibble() %>%
      dplyr::rename(FDR = padj) %>%
      mutate(padj = p.adjust(pval, "bonferroni"), 
             sig = padj < 0.05) %>%
      dplyr::select(pathway, pval, FDR, padj, everything()) %>%
      arrange(pval) 
  })

fgseares$KI %<>% 
  mutate(coef = "KI")

fgseares$Tg %<>% 
  mutate(coef = "Tg")

HMP <- 
  fryRes %>% 
  bind_rows() %>% 
  dplyr::select(geneset, PValue, coef) %>% 
  dplyr::rename(fry_p = PValue) %>% 
  left_join(camerares %>% 
              bind_rows() %>% 
              dplyr::select(geneset, PValue, coef), 
            by = c("geneset", "coef")) %>% 
  dplyr::rename(camera_p = PValue) %>% 
  left_join(fgseares %>% 
              bind_rows() %>% 
              dplyr::select(c(geneset = pathway), pval, coef), 
            by = c("geneset", "coef")) %>% 
  dplyr::rename(fgsea_p = pval) %>% 
  bind_rows() %>% 
  nest(p = one_of(c("fry_p", "camera_p", "fgsea_p"))) %>% 
 mutate(harmonic_p = vapply(p, function(x){
        x <- unlist(x)
        x <- x[!is.na(x)]
        p.hmp(x, L = 4)
      }, numeric(1))
    ) %>% 
  unnest() %>% 
  mutate(harmonic_p_FDR = p.adjust(harmonic_p, "fdr"), 
         hmp_bonf = p.adjust(harmonic_p, "bonf"),
         sig = harmonic_p_FDR < 0.05) %>% 
  arrange(harmonic_p)  

HMP
## # A tibble: 380 x 9
##    geneset coef    fry_p camera_p  fgsea_p harmonic_p harmonic_p_FDR hmp_bonf
##    <chr>   <chr>   <dbl>    <dbl>    <dbl>      <dbl>          <dbl>    <dbl>
##  1 KEGG_L… KI    1.29e-6 3.67e-16 1.00e-10   1.10e-15       4.19e-13 4.19e-13
##  2 KEGG_L… KI    9.18e-7 3.59e-12 1.00e-10   1.04e-11       1.97e- 9 3.95e- 9
##  3 KEGG_S… KI    4.03e-5 2.05e- 6 1.00e-10   3.00e-10       2.85e- 8 1.14e- 7
##  4 KEGG_C… KI    3.98e-6 1.74e- 4 1.00e-10   3.00e-10       2.85e- 8 1.14e- 7
##  5 KEGG_A… KI    2.58e-5 1.82e-10 8.96e- 9   5.34e-10       4.06e- 8 2.03e- 7
##  6 KEGG_T… KI    3.16e-6 2.17e-10 1.08e- 6   6.49e-10       4.11e- 8 2.47e- 7
##  7 KEGG_H… KI    6.93e-5 3.64e- 4 5.45e-10   1.63e- 9       8.87e- 8 6.21e- 7
##  8 KEGG_C… KI    1.07e-3 1.47e- 4 4.85e- 9   1.45e- 8       6.91e- 7 5.53e- 6
##  9 KEGG_M… Tg    3.47e-2 1.12e- 1 7.75e- 9   2.33e- 8       9.82e- 7 8.84e- 6
## 10 KEGG_C… KI    7.85e-6 6.05e- 8 8.39e- 3   1.80e- 7       6.85e- 6 6.85e- 5
## # … with 370 more rows, and 1 more variable: sig <lgl>
sigpaths <-HMP %>% 
  dplyr::filter(hmp_bonf < 0.05) %>% .$geneset %>% unique()

HMP %>% 
  dplyr::filter(geneset %in% sigpaths) %>% 
  mutate(coef = case_when(coef == "KI" ~ "APP NL-G-F", 
                          coef == "Tg" ~ "3xTg") 
         ) %>% 
  ggplot(aes(y = geneset , x = -log10(harmonic_p))) +
  geom_col(aes(fill = hmp_bonf < 0.05)
           ) +
  scale_fill_viridis_d(end = .5) +
  theme_bw() +
  facet_share(~coef, reverse_num = T) +
  labs(x = "-log10(harmonic_p)",
       fill = "Bonferroni-adjusted\nharmonic_p < 0.05",
       y = "gene set") +
  theme(legend.position = "bottom") 

  #ggsave("plots/HMP.png", width = 30, height = 20, units = "cm", dpi = 800, scale = 1.3)

upset plots of leading edge genes and DE genes

a <- KEGG[HMP %>% 
  dplyr::filter(coef == "KI" & hmp_bonf < 0.05) %>% 
  .$geneset] %>% 
  lapply(function(x) {
    x %>% 
      intersect(toptable$KI %>% dplyr::filter(DE == TRUE) %>% .$SYMBOL)
  }) %>% 
  fromList() %>% 
  upset(order.by = 'freq', 
        nsets = length(.)
  )

b <- fgseares$KI %>% 
  dplyr::filter(pathway %in% c(HMP %>% 
                                 dplyr::filter(coef == "KI" & hmp_bonf < 0.05) %>% .$geneset)) %>% 
  dplyr::select(pathway, leadingEdge) %>%  
  unnest %>%  
  split(f = .$pathway) %>% 
  lapply(magrittr::extract2,"leadingEdge")  %>% 
  fromList() %>% 
  upset(nsets = length(.), 
        order.by = 'freq'
        )

# convert to cowplots for easier plotting and exporting. 
a_cow <- cowplot::plot_grid(NULL, a$Main_bar, a$Sizes, a$Matrix,
                            nrow=2, align='hv', rel_heights = c(1,1),
                            labels = "DE genes",
                           rel_widths = c(3,4))

b_cow <- cowplot::plot_grid(NULL, b$Main_bar, b$Sizes, b$Matrix,
                            nrow=2, align='hv', rel_heights = c(1,1), 
                            labels = "Leading edge genes",
                           rel_widths = c(3,4)) 

a_cow

b_cow

# cell type check

cell_type_markers <- read_xls("data/cell_type_genes41586_2007_BFnature05453_MOESM11_ESM.xls") %>%
  as_tibble() %>%
 dplyr::select(c("Neuron-enriched", "Astrocyte-enriched", "Oligodendrocyte-enriched")) %>%
  gather(key = "cell_type", value = "gene_name") %>% 
  dplyr::filter(gene_name %in% rownames(enrichInput)) %>% 
  split(f = .$cell_type) %>% 
  lapply(extract2, "gene_name")


cell_type_markers$`Microglia-enriched` <- read_excel("data/microglia_oosterofetal.xlsx", sheet = 1) %>%
  rbind(read_excel("data/microglia_oosterofetal.xlsx", sheet = 2)) %>%
  rbind(read_excel("data/microglia_oosterofetal.xlsx", sheet = 3)) %>%
  .$Mouse.Associated.Gene.Name  %>%
  unique

ggarrange(
  colnames(contrasts) %>% 
    sapply(function(x) {
      enrichInput  %>% 
        fry(index = cell_type_markers, 
            design = design, 
            contrast = contrasts[,x], 
            sort = "directional"
        ) %>% 
        rownames_to_column('geneset') %>% 
        mutate(
          coef = x) %>% 
        as_tibble()
    }, simplify = FALSE) %>% 
    bind_rows() %>% 
      mutate(coef = case_when(coef == "KI" ~ "APP NL-G-F", 
                              coef == "Tg" ~ "3xTg")) %>% 
    dplyr::select(geneset, coef,  NGenes, PValue, FDR, Direction) %>% 
    ggplot(aes(x = -log10(FDR), y = reorder(geneset, FDR))) +
    geom_col() +
    geom_vline(xintercept = -log10(0.05)) +
    facet_wrap(~coef) +
    theme_bw() +
    labs(y = "marker gene set", 
         x = "-log10(FDR from fry)"), 
  ggarrange(
    enrichInput %>% 
      .[c(cell_type_markers$`Astrocyte-enriched`),] %>% 
      gather(key = "sample", value = "Expression", targets$sample) %>%
      left_join(targets) %>% 
      mutate(Genotype = factor(Genotype, levels = c("APP_WT", "APP_NLGF", "Non_Tg", "AD3xTg"))) %>% #group_by(Genotype) %>% summarise(mean = mean(Expression)) to work out mean of WT for geom_hline
      ggplot(aes(x = Genotype, y = Expression)) +
      geom_flat_violin( aes(fill = Genotype),
                        position = position_nudge(x = 0.2, y = 0),
                        alpha = 0.75,
                        colour = NA, 
                        trim = FALSE) +
      geom_point(aes(colour = Genotype), 
                 position = position_jitter(width = 0.15), size = 2, alpha = 0.75 ) +
      geom_boxplot(width = 0.2, 
                   outlier.shape = NA, 
                   alpha = 0.7, 
                   aes(colour = Genotype)
      ) +
      theme_bw() +
      stat_summary(geom= "point", fun = "mean", shape = 23, size = 4, fill = 'red') +
      geom_hline(yintercept =  8.05, linetype = 2) + # mean of WT
      labs(x = "",
           fill = "Genotype") +
      ggtitle("Astrocytes"), 
    
    enrichInput %>% 
      .[c(cell_type_markers$`Neuron-enriched`),] %>% 
      gather(key = "sample", value = "Expression", targets$sample) %>%
      left_join(targets) %>% #group_by(Genotype) %>% summarise(mean = mean(Expression))
      mutate(Genotype = factor(Genotype, levels = c("APP_WT", "APP_NLGF", "Non_Tg", "AD3xTg"))) %>%
      ggplot(aes(x = Genotype, y = Expression)) +
      geom_flat_violin( aes(fill = Genotype),
                        colour = NA,
                        position = position_nudge(x = 0.2, y = 0),
                        alpha = 0.75,
                        trim = FALSE) +
      geom_point(aes(colour = Genotype), 
                 position = position_jitter(width = 0.15), size = 2, alpha = 0.75 ) +
      geom_boxplot(width = 0.2, 
                   outlier.shape = NA, 
                   alpha = 0.7, 
                   aes(colour = Genotype)
      ) +
      theme_bw() +
      stat_summary(geom= "point", fun = "mean", shape = 23, size = 4, fill = 'red') +
      geom_hline(yintercept =  9.87, linetype = 2) +
      labs(x = "",
           fill = "Genotype") +
      ggtitle("Neurons"),
    
    enrichInput %>% 
      .[c(cell_type_markers$`Oligodendrocyte-enriched`),] %>% 
      gather(key = "sample", value = "Expression", targets$sample) %>%
      left_join(targets) %>% 
      mutate(Genotype = factor(Genotype, levels = c("APP_WT", "APP_NLGF", "Non_Tg", "AD3xTg"))) %>% #group_by(Genotype) %>% summarise(mean = mean(Expression))
      ggplot(aes(x = Genotype, y = Expression)) +
      geom_flat_violin( aes(fill = Genotype), 
                        colour = NA,
                        position = position_nudge(x = 0.2, y = 0),
                        alpha = 0.75,
                        trim = FALSE) +
      geom_point(aes(colour = Genotype), 
                 position = position_jitter(width = 0.15), size = 2, alpha = 0.75 ) +
      geom_boxplot(width = 0.2, 
                   outlier.shape = NA, 
                   alpha = 0.7, 
                   aes(colour = Genotype)
      ) +
      geom_hline(yintercept = 8.24, linetype = 2) +
      stat_summary(geom= "point", fun = "mean", shape = 23, size = 4, fill = 'red') +
      theme_bw() +
      labs(x = "",
           fill = "Genotype") +
      ggtitle("Oligodendrocytes"), 
    
    enrichInput %>% 
      .[c(cell_type_markers$`Microglia-enriched`),] %>% 
      gather(key = "sample", value = "Expression", targets$sample) %>%
      left_join(targets) %>% 
      mutate(Genotype = factor(Genotype, levels = c("APP_WT", "APP_NLGF", "Non_Tg", "AD3xTg"))) %>% 
      na.omit %>% #group_by(Genotype) %>% summarise(mean = mean(Expression))
      ggplot(aes(x = Genotype, y = Expression)) +
      geom_flat_violin( aes(fill = Genotype),
                        position = position_nudge(x = 0.2, y = 0),
                        colour = NA, 
                        alpha = 0.75,
                        trim = FALSE) +
      geom_point(aes(colour = Genotype), 
                 position = position_jitter(width = 0.15), size = 2, alpha = 0.75 ) +
      geom_boxplot(width = 0.2, 
                   outlier.shape = NA, 
                   alpha = 0.7, 
                   aes(colour = Genotype)
      ) +
      theme_bw() +
      stat_summary(geom= "point", fun = "mean", shape = 23, size = 4, fill = 'red') +
      geom_hline(yintercept = 5.86, linetype = 2) +
      labs(x = "",
           fill = "Genotype") +
      ggtitle("Microglia"), 
    common.legend = TRUE, legend = "none" ), 
  labels = "AUTO", 
  common.legend = F, 
  ncol = 1, 
  heights = c(1, 4)
) 

c <- list(DE_genes_APP_NLGF = toptable$KI %>% 
                  dplyr::filter(DE == TRUE) %>% 
                  .$SYMBOL, 
     `Astrocyte-enriched` = cell_type_markers$`Astrocyte-enriched`, 
     `Neuron-enriched` = cell_type_markers$`Neuron-enriched`, 
     `Oligodendrocyte-enriched`  = cell_type_markers$`Oligodendrocyte-enriched`, 
     `Microglia-enriched` = cell_type_markers$`Microglia-enriched`
     ) %>% 
  fromList() %>% 
  upset(order.by = "freq", 
        queries = list(
          list(
            query = intersects, 
            params = list(
              "DE_genes_APP_NLGF", "Microglia-enriched"
            ), 
            active = TRUE
          )
        ))

d <- list(DE_genes_3xTg = toptable$Tg %>% 
                  dplyr::filter(DE == TRUE) %>% 
                  .$SYMBOL, 
     `Astrocyte-enriched` = cell_type_markers$`Astrocyte-enriched`, 
     `Neuron-enriched` = cell_type_markers$`Neuron-enriched`, 
     `Oligodendrocyte-enriched`  = cell_type_markers$`Oligodendrocyte-enriched`, 
     `Microglia-enriched` = cell_type_markers$`Microglia-enriched`
     ) %>% 
  fromList() %>% 
  upset(order.by = "freq", 
    
        )


# convert to cowplots for easier plotting and exporting. 
c_cow <- cowplot::plot_grid(NULL, c$Main_bar, c$Sizes, c$Matrix,
                            nrow=2, align='hv', rel_heights = c(1,1),
                            labels = "APP NLGF",
                           rel_widths = c(3,4))

d_cow <- cowplot::plot_grid(NULL, d$Main_bar, d$Sizes, d$Matrix,
                            nrow=2, align='hv', rel_heights = c(1,1), 
                            labels = "3xTg",
                           rel_widths = c(3,4)) 

plot_grid(c_cow, d_cow, ncol = 2,  nrow = 1 ) 

Session info

sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Mojave 10.14.3
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8
## 
## attached base packages:
##  [1] grid      stats4    parallel  stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] pd.mogene.2.0.st_3.14.1              DBI_1.1.0                           
##  [3] RSQLite_2.2.1                        cluster_2.1.0                       
##  [5] factoextra_1.0.7                     arrayQualityMetrics_3.44.0          
##  [7] lattice_0.20-41                      gridExtra_2.3                       
##  [9] cowplot_1.1.0                        ggpol_0.0.7                         
## [11] ggpubr_0.4.0                         harmonicmeanp_3.0                   
## [13] FMStable_0.1-2                       fgsea_1.14.0                        
## [15] UpSetR_1.4.0                         viridis_0.5.1                       
## [17] viridisLite_0.3.0                    RColorBrewer_1.1-2                  
## [19] pheatmap_1.0.12                      readxl_1.3.1                        
## [21] msigdbr_7.2.1                        oligo_1.52.1                        
## [23] Biostrings_2.56.0                    XVector_0.28.0                      
## [25] oligoClasses_1.50.4                  mogene20sttranscriptcluster.db_8.7.0
## [27] mogene20stprobeset.db_8.7.0          org.Mm.eg.db_3.11.4                 
## [29] AnnotationDbi_1.50.3                 IRanges_2.22.2                      
## [31] S4Vectors_0.26.1                     ggeasy_0.1.2                        
## [33] ggfortify_0.4.11                     kableExtra_1.3.1                    
## [35] affy_1.66.0                          GEOquery_2.56.0                     
## [37] Biobase_2.48.0                       BiocGenerics_0.34.0                 
## [39] limma_3.44.3                         magrittr_2.0.1                      
## [41] forcats_0.5.0                        stringr_1.4.0                       
## [43] dplyr_1.0.2                          purrr_0.3.4                         
## [45] readr_1.4.0                          tidyr_1.1.2                         
## [47] tibble_3.0.4                         ggplot2_3.3.2                       
## [49] tidyverse_1.3.0                     
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.1.4                  tidyselect_1.1.0           
##   [3] htmlwidgets_1.5.2           beadarray_2.38.0           
##   [5] BiocParallel_1.22.0         munsell_0.5.0              
##   [7] codetools_0.2-18            preprocessCore_1.50.0      
##   [9] statmod_1.4.35              withr_2.3.0                
##  [11] colorspace_2.0-0            highr_0.8                  
##  [13] knitr_1.30                  rstudioapi_0.13            
##  [15] setRNG_2013.9-1             ggsignif_0.6.0             
##  [17] labeling_0.4.2              GenomeInfoDbData_1.2.3     
##  [19] hwriter_1.3.2               farver_2.0.3               
##  [21] bit64_4.0.5                 vctrs_0.3.5                
##  [23] generics_0.1.0              xfun_0.19                  
##  [25] affxparser_1.60.0           R6_2.5.0                   
##  [27] GenomeInfoDb_1.24.2         illuminaio_0.30.0          
##  [29] gridSVG_1.7-2               bitops_1.0-6               
##  [31] DelayedArray_0.14.1         assertthat_0.2.1           
##  [33] scales_1.1.1                nnet_7.3-14                
##  [35] gtable_0.3.0                rlang_0.4.9                
##  [37] genefilter_1.70.0           systemfonts_0.3.2          
##  [39] splines_4.0.2               rstatix_0.6.0              
##  [41] hexbin_1.28.1               broom_0.7.2                
##  [43] checkmate_2.0.0             BiocManager_1.30.10        
##  [45] yaml_2.2.1                  reshape2_1.4.4             
##  [47] abind_1.4-5                 modelr_0.1.8               
##  [49] backports_1.2.0             Hmisc_4.4-1                
##  [51] tools_4.0.2                 affyio_1.58.0              
##  [53] ellipsis_0.3.1              ff_4.0.4                   
##  [55] Rcpp_1.0.5                  plyr_1.8.6                 
##  [57] base64enc_0.1-3             zlibbioc_1.34.0            
##  [59] RCurl_1.98-1.2              rpart_4.1-15               
##  [61] openssl_1.4.3               ggrepel_0.8.2              
##  [63] SummarizedExperiment_1.18.2 haven_2.3.1                
##  [65] fs_1.5.0                    data.table_1.13.2          
##  [67] openxlsx_4.2.3              reprex_0.3.0               
##  [69] matrixStats_0.57.0          hms_0.5.3                  
##  [71] evaluate_0.14               xtable_1.8-4               
##  [73] XML_3.99-0.5                rio_0.5.16                 
##  [75] jpeg_0.1-8.1                gcrma_2.60.0               
##  [77] compiler_4.0.2              crayon_1.3.4               
##  [79] htmltools_0.5.0             Formula_1.2-4              
##  [81] lubridate_1.7.9.2           dbplyr_2.0.0               
##  [83] Matrix_1.2-18               car_3.0-10                 
##  [85] cli_2.2.0                   vsn_3.56.0                 
##  [87] GenomicRanges_1.40.0        pkgconfig_2.0.3            
##  [89] foreign_0.8-80              xml2_1.3.2                 
##  [91] foreach_1.5.1               svglite_1.2.3.2            
##  [93] annotate_1.66.0             BeadDataPackR_1.40.0       
##  [95] affyPLM_1.64.0              webshot_0.5.2              
##  [97] rvest_0.3.6                 digest_0.6.27              
##  [99] rmarkdown_2.5               base64_2.0                 
## [101] cellranger_1.1.0            fastmatch_1.1-0            
## [103] htmlTable_2.1.0             gdtools_0.2.2              
## [105] curl_4.3                    lifecycle_0.2.0            
## [107] jsonlite_1.7.1              carData_3.0-4              
## [109] askpass_1.1                 fansi_0.4.1                
## [111] pillar_1.4.7                httr_1.4.2                 
## [113] survival_3.2-7              glue_1.4.2                 
## [115] zip_2.1.1                   png_0.1-7                  
## [117] iterators_1.0.13            bit_4.0.4                  
## [119] stringi_1.5.3               blob_1.2.1                 
## [121] latticeExtra_0.6-29         memoise_1.1.0